{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2025-07-08T03:05:52.511593Z",
     "start_time": "2025-07-08T03:05:52.456060Z"
    }
   },
   "source": [
    "from typing import Optional\n",
    "\n",
    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "class Person(BaseModel):\n",
    "    \"\"\"Information about a person.\"\"\"\n",
    "\n",
    "    # ^ Doc-string for the entity Person.\n",
    "    # This doc-string is sent to the LLM as the description of the schema Person,\n",
    "    # and it can help to improve extraction results.\n",
    "\n",
    "    # Note that:\n",
    "    # 1. Each field is an `optional` -- this allows the model to decline to extract it!\n",
    "    # 2. Each field has a `description` -- this description is used by the LLM.\n",
    "    # Having a good description can help improve extraction results.\n",
    "    name: Optional[str] = Field(default=None, description=\"The name of the person\")\n",
    "    hair_color: Optional[str] = Field(\n",
    "        default=None, description=\"The color of the person's hair if known\"\n",
    "    )\n",
    "    height_in_meters: Optional[str] = Field(\n",
    "        default=None, description=\"Height measured in meters\"\n",
    "    )"
   ],
   "outputs": [],
   "execution_count": 1
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-08T03:08:59.609978Z",
     "start_time": "2025-07-08T03:08:59.468036Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
    "\n",
    "# Define a custom prompt to provide instructions and any additional context.\n",
    "# 1) You can add examples into the prompt template to improve extraction quality\n",
    "# 2) Introduce additional parameters to take context into account (e.g., include metadata\n",
    "#    about the document from which the text was extracted.)\n",
    "prompt_template = ChatPromptTemplate.from_messages(\n",
    "    [\n",
    "        (\n",
    "            \"system\",\n",
    "            \"You are an expert extraction algorithm. \"\n",
    "            \"Only extract relevant information from the text. \"\n",
    "            \"If you do not know the value of an attribute asked to extract, \"\n",
    "            \"return null for the attribute's value.\",\n",
    "        ),\n",
    "        # Please see the how-to about improving performance with\n",
    "        # reference examples.\n",
    "        # MessagesPlaceholder('examples'),\n",
    "        (\"human\", \"{text}\"),\n",
    "    ]\n",
    ")"
   ],
   "id": "dc1759f0b382214a",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/envs/langchain/lib/python3.12/site-packages/requests/__init__.py:86: RequestsDependencyWarning: Unable to find acceptable character detection dependency (chardet or charset_normalizer).\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "execution_count": 2
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-08T03:10:01.642789Z",
     "start_time": "2025-07-08T03:10:01.240369Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from dotenv import load_dotenv\n",
    "load_dotenv()\n",
    "import getpass\n",
    "import os\n",
    "\n",
    "if not os.environ.get(\"DEEPSEEK_API_KEY\"):\n",
    "  os.environ[\"DEEPSEEK_API_KEY\"] = getpass.getpass(\"Enter API key for Google Gemini: \")\n",
    "\n",
    "from langchain.chat_models import init_chat_model\n",
    "\n",
    "llm = init_chat_model(\"deepseek-chat\")"
   ],
   "id": "d7164a7f2f113db8",
   "outputs": [],
   "execution_count": 3
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-08T03:10:19.532974Z",
     "start_time": "2025-07-08T03:10:19.527492Z"
    }
   },
   "cell_type": "code",
   "source": "structured_llm = llm.with_structured_output(schema=Person)",
   "id": "f5182dfad04d45da",
   "outputs": [],
   "execution_count": 4
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-08T03:10:42.649401Z",
     "start_time": "2025-07-08T03:10:37.342546Z"
    }
   },
   "cell_type": "code",
   "source": [
    "text = \"Alan Smith is 6 feet tall and has blond hair.\"\n",
    "prompt = prompt_template.invoke({\"text\": text})\n",
    "structured_llm.invoke(prompt)"
   ],
   "id": "790213db4d4a4bdd",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Person(name='Alan Smith', hair_color='blond', height_in_meters=None)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 5
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "## Multiple Entities",
   "id": "b0fd3ffeb896825a"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-08T03:12:09.728811Z",
     "start_time": "2025-07-08T03:12:09.722847Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from typing import List, Optional\n",
    "\n",
    "from pydantic import BaseModel, Field\n",
    "\n",
    "\n",
    "class Person(BaseModel):\n",
    "    \"\"\"Information about a person.\"\"\"\n",
    "\n",
    "    # ^ Doc-string for the entity Person.\n",
    "    # This doc-string is sent to the LLM as the description of the schema Person,\n",
    "    # and it can help to improve extraction results.\n",
    "\n",
    "    # Note that:\n",
    "    # 1. Each field is an `optional` -- this allows the model to decline to extract it!\n",
    "    # 2. Each field has a `description` -- this description is used by the LLM.\n",
    "    # Having a good description can help improve extraction results.\n",
    "    name: Optional[str] = Field(default=None, description=\"The name of the person\")\n",
    "    hair_color: Optional[str] = Field(\n",
    "        default=None, description=\"The color of the person's hair if known\"\n",
    "    )\n",
    "    height_in_meters: Optional[str] = Field(\n",
    "        default=None, description=\"Height measured in meters\"\n",
    "    )\n",
    "\n",
    "\n",
    "class Data(BaseModel):\n",
    "    \"\"\"Extracted data about people.\"\"\"\n",
    "\n",
    "    # Creates a model so that we can extract multiple entities.\n",
    "    people: List[Person]"
   ],
   "id": "4eb834a203d9e59d",
   "outputs": [],
   "execution_count": 6
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-08T03:12:26.552535Z",
     "start_time": "2025-07-08T03:12:20.489729Z"
    }
   },
   "cell_type": "code",
   "source": [
    "structured_llm = llm.with_structured_output(schema=Data)\n",
    "text = \"My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me.\"\n",
    "prompt = prompt_template.invoke({\"text\": text})\n",
    "structured_llm.invoke(prompt)"
   ],
   "id": "6fd270291f012041",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Data(people=[Person(name='Jeff', hair_color='black', height_in_meters='1.8288'), Person(name='Anna', hair_color='black', height_in_meters=None)])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 7
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-08T03:20:56.474481Z",
     "start_time": "2025-07-08T03:20:39.051168Z"
    }
   },
   "cell_type": "code",
   "source": [
    "messages = [\n",
    "    {\"role\": \"user\", \"content\": \"2 🦜 2\"},\n",
    "    {\"role\": \"assistant\", \"content\": \"4\"},\n",
    "    {\"role\": \"user\", \"content\": \"2 🦜 3\"},\n",
    "    {\"role\": \"assistant\", \"content\": \"5\"},\n",
    "    {\"role\": \"user\", \"content\": \"3 🦜 4\"},\n",
    "]\n",
    "\n",
    "response = llm.invoke(messages)\n",
    "print(response.content)"
   ],
   "id": "c7f09fb0e8ace062",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7  \n",
      "\n",
      "The pattern seems to be adding the two numbers and then adding 1 (possibly representing the parrot 🦜).  \n",
      "\n",
      "Here's the breakdown:  \n",
      "- **2 🦜 2** → 2 + 2 + 1 = **5** (but you initially said 4, so the rule might differ)  \n",
      "- **2 🦜 3** → 2 + 3 = **5** (matches)  \n",
      "- **3 🦜 4** → 3 + 4 = **7** (matches)  \n",
      "\n",
      "If the parrot 🦜 means simple addition (without +1), then the answer is **7**.  \n",
      "\n",
      "But if the parrot adds 1 (like a \"squawk bonus\"), then:  \n",
      "- **2 🦜 2** → 2 + 2 + 1 = **5** (not 4)  \n",
      "- **2 🦜 3** → 2 + 3 + 1 = **6** (not 5)  \n",
      "- **3 🦜 4** → 3 + 4 + 1 = **8** (not 7)  \n",
      "\n",
      "Since your answers suggest **🦜 = plain addition**, the correct answer is:  \n",
      "### **7** (3 + 4)  \n",
      "\n",
      "Let me know if the parrot has a different rule! 😊\n"
     ]
    }
   ],
   "execution_count": 8
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-08T06:24:33.799779Z",
     "start_time": "2025-07-08T06:24:33.790709Z"
    }
   },
   "cell_type": "code",
   "source": [
    "from langchain_core.utils.function_calling import tool_example_to_messages\n",
    "\n",
    "examples = [\n",
    "    (\n",
    "        \"The ocean is vast and blue. It's more than 20,000 feet deep.\",\n",
    "        Data(people=[]),\n",
    "    ),\n",
    "    (\n",
    "        \"Fiona traveled far from France to Spain.\",\n",
    "        Data(people=[Person(name=\"Fiona\", height_in_meters=None, hair_color=None)]),\n",
    "    ),\n",
    "]\n",
    "\n",
    "\n",
    "messages = []\n",
    "\n",
    "for txt, tool_call in examples:\n",
    "    if tool_call.people:\n",
    "        # This final message is optional for some providers\n",
    "        ai_response = \"Detected people.\"\n",
    "    else:\n",
    "        ai_response = \"Detected no people.\"\n",
    "    messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))"
   ],
   "id": "dea13c32cdfadca4",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/cf/2vzq012x5db6bdr3tyd_57lc0000gn/T/ipykernel_23741/1168640015.py:23: LangChainBetaWarning: The function `tool_example_to_messages` is in beta. It is actively being worked on, so the API may change.\n",
      "  messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))\n"
     ]
    }
   ],
   "execution_count": 9
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-08T06:31:52.345200Z",
     "start_time": "2025-07-08T06:31:48.161111Z"
    }
   },
   "cell_type": "code",
   "source": [
    "message_no_extraction = {\n",
    "    \"role\": \"user\",\n",
    "    \"content\": \"The solar system is large, but earth has only 1 moon.\",\n",
    "}\n",
    "\n",
    "structured_llm = llm.with_structured_output(schema=Data)\n",
    "structured_llm.invoke([message_no_extraction])"
   ],
   "id": "b9c747e72e22d317",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Data(people=[])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 11
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-07-08T06:33:36.439028Z",
     "start_time": "2025-07-08T06:33:32.233179Z"
    }
   },
   "cell_type": "code",
   "source": "structured_llm.invoke(messages + [message_no_extraction])",
   "id": "98eb2c87ba1e6504",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Data(people=[])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 12
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
